/*
 * Copyright (C) 2017 Waldemar Brodkorb <wbx@uclibc-ng.org>
 * Ported from GNU C Library
 * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
 */

/* Thread-local storage handling in the ELF dynamic linker.
   AArch64 version.
   Copyright (C) 2011-2017 Free Software Foundation, Inc.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>

#if defined __UCLIBC_HAS_TLS__

#include <tls.h>
#include "tlsdesc.h"

#define PTR_REG(n)              x##n
#define PTR_LOG_SIZE            3
#define PTR_SIZE        (1<<PTR_LOG_SIZE)

#define NSAVEDQREGPAIRS	16
#define SAVE_Q_REGISTERS				\
	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
	stp	 q2,  q3, [sp, #32*1];			\
	stp	 q4,  q5, [sp, #32*2];			\
	stp	 q6,  q7, [sp, #32*3];			\
	stp	 q8,  q9, [sp, #32*4];			\
	stp	q10, q11, [sp, #32*5];			\
	stp	q12, q13, [sp, #32*6];			\
	stp	q14, q15, [sp, #32*7];			\
	stp	q16, q17, [sp, #32*8];			\
	stp	q18, q19, [sp, #32*9];			\
	stp	q20, q21, [sp, #32*10];			\
	stp	q22, q23, [sp, #32*11];			\
	stp	q24, q25, [sp, #32*12];			\
	stp	q26, q27, [sp, #32*13];			\
	stp	q28, q29, [sp, #32*14];			\
	stp	q30, q31, [sp, #32*15];

#define RESTORE_Q_REGISTERS				\
	ldp	 q2,  q3, [sp, #32*1];			\
	ldp	 q4,  q5, [sp, #32*2];			\
	ldp	 q6,  q7, [sp, #32*3];			\
	ldp	 q8,  q9, [sp, #32*4];			\
	ldp	q10, q11, [sp, #32*5];			\
	ldp	q12, q13, [sp, #32*6];			\
	ldp	q14, q15, [sp, #32*7];			\
	ldp	q16, q17, [sp, #32*8];			\
	ldp	q18, q19, [sp, #32*9];			\
	ldp	q20, q21, [sp, #32*10];			\
	ldp	q22, q23, [sp, #32*11];			\
	ldp	q24, q25, [sp, #32*12];			\
	ldp	q26, q27, [sp, #32*13];			\
	ldp	q28, q29, [sp, #32*14];			\
	ldp	q30, q31, [sp, #32*15];			\
	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);

	.text

	/* Compute the thread pointer offset for symbols in the static
	   TLS block. The offset is the same for all threads.
	   Prototype:
	   _dl_tlsdesc_return (tlsdesc *) ;
	 */
	.hidden _dl_tlsdesc_return
	.global	_dl_tlsdesc_return
	.type	_dl_tlsdesc_return,%function
	.align 2
_dl_tlsdesc_return:
	ldr x0,[x0,#8]
	ret
	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return

#ifdef SHARED
	/* Handler for dynamic TLS symbols.
	   Prototype:
	   _dl_tlsdesc_dynamic (tlsdesc *) ;

	   The second word of the descriptor points to a
	   tlsdesc_dynamic_arg structure.

	   Returns the offset between the thread pointer and the
	   object referenced by the argument.

	   ptrdiff_t
	   __attribute__ ((__regparm__ (1)))
	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
	   {
	     struct tlsdesc_dynamic_arg *td = tdp->arg;
	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
	     if (__builtin_expect (td->gen_count <= dtv[0].counter
		&& (dtv[td->tlsinfo.ti_module].pointer.val
		    != TLS_DTV_UNALLOCATED),
		1))
	       return dtv[td->tlsinfo.ti_module].pointer.val
		+ td->tlsinfo.ti_offset
		- __thread_pointer;

	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
	   }
	 */

	.hidden _dl_tlsdesc_dynamic
	.global	_dl_tlsdesc_dynamic
	.type	_dl_tlsdesc_dynamic,%function
	cfi_startproc
	.align 2
_dl_tlsdesc_dynamic:
# define NSAVEXREGPAIRS 2
	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
	mov	x29, sp

	/* Save just enough registers to support fast path, if we fall
	   into slow path we will save additional registers.  */

	stp	x1,  x2, [sp, #32+16*0]
	stp	x3,  x4, [sp, #32+16*1]

	mrs	x4, tpidr_el0
	/* The ldar here happens after the load from [x0] at the call site
	   (that is generated by the compiler as part of the TLS access ABI),
	   so it reads the same value (this function is the final value of
	   td->entry) and thus it synchronizes with the release store to
	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg.  */
	ldar	PTR_REG (zr), [x0]
	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
	ldr	PTR_REG (0), [x4,#TCBHEAD_DTV]
	ldr	PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
	ldr	PTR_REG (2), [x0,#DTV_COUNTER]
	cmp	PTR_REG (3), PTR_REG (2)
	b.hi	2f
	ldr	PTR_REG (2), [x1,#TLSDESC_MODID]
	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
	ldr	PTR_REG (0), [x0] /* Load val member of DTV entry.  */
	cmp	x0, #TLS_DTV_UNALLOCATED
	b.eq	2f
	ldr	PTR_REG (1), [x1,#TLSDESC_MODOFF]
	add	PTR_REG (0), PTR_REG (0), PTR_REG (1)
	sub	PTR_REG (0), PTR_REG (0), PTR_REG (4)
1:
	ldp	 x1,  x2, [sp, #32+16*0]
	ldp	 x3,  x4, [sp, #32+16*1]

	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
# undef NSAVEXREGPAIRS
	ret
2:
	/* This is the slow path. We need to call __tls_get_addr() which
	   means we need to save and restore all the register that the
	   callee will trash.  */

	/* Save the remaining registers that we must treat as caller save.  */
# define NSAVEXREGPAIRS 7
	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
	stp	 x7,  x8, [sp, #16*1]
	stp	 x9, x10, [sp, #16*2]
	stp	x11, x12, [sp, #16*3]
	stp	x13, x14, [sp, #16*4]
	stp	x15, x16, [sp, #16*5]
	stp	x17, x18, [sp, #16*6]

	SAVE_Q_REGISTERS

	mov	x0, x1
	bl	__tls_get_addr

	mrs	x1, tpidr_el0
	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)

	RESTORE_Q_REGISTERS

	ldp	 x7,  x8, [sp, #16*1]
	ldp	 x9, x10, [sp, #16*2]
	ldp	x11, x12, [sp, #16*3]
	ldp	x13, x14, [sp, #16*4]
	ldp	x15, x16, [sp, #16*5]
	ldp	x17, x18, [sp, #16*6]
	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
	b	1b
	cfi_endproc
	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
# undef NSAVEXREGPAIRS

#endif // SHARED
#endif // __UCLIBC_HAS_TLS__
